LOB + Liquidity Measures

  • Feedforward model with one hidden layer.

  • Trained using LOB and Liquidity measures together: Input dimension is 60 x 44 = 2640

  • Mean squared error used as loss function.

  • Optimizer: Adam

  • Training set consists of 202 days (~80%)

  • Validation set consists of 51 days (~20%)

Shown below:

  • Network structure

  • Settings used for training

  • Plots of model performance

Network Structure

In [1]:
import os
import torch
from torchsummary import summary
from feedforward_three_layer import FFNN3
from IPython.display import HTML
def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
model=FFNN3()
ckpt_name = os.listdir()[[*map(lambda x: x.count('ckpt'),os.listdir())].index(True)]
ckpt = torch.load('./'+ckpt_name,map_location=torch.device('cpu'))
print(f'Input size: {60*44}')
summary(model.float(), input_size=(60,44))
hide_code()
Input size: 2640
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1                   [-1, 64]         169,024
       BatchNorm1d-2                   [-1, 64]             128
         LeakyReLU-3                   [-1, 64]               0
           Dropout-4                   [-1, 64]               0
            Linear-5                   [-1, 64]           4,160
       BatchNorm1d-6                   [-1, 64]             128
         LeakyReLU-7                   [-1, 64]               0
            Linear-8                   [-1, 64]           4,160
       BatchNorm1d-9                   [-1, 64]             128
        LeakyReLU-10                   [-1, 64]               0
           Linear-11                    [-1, 5]             325
      BatchNorm1d-12                    [-1, 5]              10
        LeakyReLU-13                    [-1, 5]               0
================================================================
Total params: 178,063
Trainable params: 178,063
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.00
Params size (MB): 0.68
Estimated Total Size (MB): 0.69
----------------------------------------------------------------
Out[1]:

 Settings used in training

In [2]:
import pandas as pd
from configs.train_config import cfg
configuration = pd.concat([
                            pd.DataFrame.from_dict(cfg,'index').loc[['STOCK','LOB','LIQ_VARS']],\
                           pd.DataFrame.from_dict(cfg.TRAIN,'index').loc[['SHUFFLE','VAL_SHUFFLE','INTRADAY_SHUFFLE'\
                                                                    ,'SPLIT_RATIO','BATCH_SIZE','VAL_BATCH_SIZE']], \
                           pd.DataFrame.from_dict(cfg.MODEL,'index').loc[['BACKBONE','LOSS','DROPOUT_RATE','LEAKY_SLOPE']],\
                           pd.DataFrame.from_dict(cfg.OPTIMIZER,'index').loc[['LR','METHOD','LR_SCHEDULER','LAMBDA']]],\
              keys=['USED IN TRAINING','TRAIN','MODEL','OPTIMIZER']); configuration.columns=['CONFIGURATIONS']
print(f'Epoch loaded: {ckpt_name[5:-4]}')
configuration
Epoch loaded: 30000
Out[2]:
CONFIGURATIONS
USED IN TRAINING STOCK GARAN
LOB True
LIQ_VARS True
TRAIN SHUFFLE False
VAL_SHUFFLE False
INTRADAY_SHUFFLE False
SPLIT_RATIO 0.8
BATCH_SIZE 202
VAL_BATCH_SIZE 1
MODEL BACKBONE FFNN3
LOSS MSE
DROPOUT_RATE 0
LEAKY_SLOPE 0.5
OPTIMIZER LR 0.001
METHOD adam
LR_SCHEDULER ReduceLROnPlateau
LAMBDA 0
In [3]:
import sys
sys.path.append('./../')
import numpy as np
from sklearn.linear_model import Ridge,LinearRegression

filenames = os.listdir(cfg.DATA.DATA_PATH)
[filenames.pop(i) for i,k in enumerate(filenames) if k.split('.')[-1]!='npy'] #.DS_STORE problem
filenames.sort()
train_datanames = []
if cfg.DATA.PORTION is not None:
    filenames = filenames[:int(len(filenames)*cfg.DATA.PORTION)]
for filename in filenames:
    if len(train_datanames) < int(len(filenames)*cfg.TRAIN.SPLIT_RATIO):
        train_datanames.append(filename)
    else:
        break
    val_datanames = [i for i in filenames if not train_datanames.count(i)]
    
X_t = [] ; y_t = [] ; X_v = [] ; y_v = []
for i in train_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_t.append(torch.from_numpy(item['X'])); y_t.append(torch.from_numpy(item['y']))
for i in val_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_v.append(torch.from_numpy(item['X'])); y_v.append(torch.from_numpy(item['y']))


model.double();model.load_state_dict(ckpt['model_state'], strict=False);torch.set_grad_enabled(False);model.eval()

temp = []
for i in X_t:
    for k in i:
        temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []     
for i in X_v:
    for k in i:
        temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])

for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
    assert i.tolist() == k.tolist()

for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
    assert i.tolist() == k.tolist()

y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
y_t = torch.cat(y_t) ; y_v = torch.cat(y_v)

se_train = (y_t_pred-y_t)**2 ; se_val = (y_v_pred-y_v)**2
ape_train = 100*(1-y_t_pred/y_t).abs() ; ape_val = 100*(1-y_v_pred/y_v).abs()

mse_train = se_train.mean(dim=0).numpy() ; mse_val = se_val.mean(dim=0).numpy()
mape_train = ape_train.mean(dim=0).numpy() ; mape_val = ape_val.mean(dim=0).numpy()


##### ##### ##### ##### ##### Linear Regression ##### ##### ##### ##### #####

X_reg = torch.cat(X_t).flatten(1,2)
X_reg = torch.cat([torch.tensor([[1]]*X_reg.shape[0],dtype=torch.float64),X_reg],1).tolist()
X_v_reg = torch.cat(X_v).flatten(1,2)
X_v_reg = torch.cat([torch.tensor([[1]]*X_v_reg.shape[0],dtype=torch.float64),X_v_reg],1)

regr = LinearRegression()
regr.fit(X_reg,y_t.tolist())
y_t_regs = regr.predict(X_reg) ; y_v_regs = regr.predict(X_v_reg)

se_train_reg = (y_t-y_t_regs)**2 ; se_val_reg = (y_v-y_v_regs)**2
ape_train_reg = 100*(1-y_t_regs/y_t).abs() ; ape_val_reg = 100*(1-y_v_regs/y_v).abs()

reg_train_mses = se_train_reg.mean(dim=0).numpy() ; reg_mses = se_val_reg.mean(dim=0).numpy()
reg_train_mapes = ape_train_reg.mean(dim=0).numpy() ; reg_mapes = ape_val_reg.mean(dim=0).numpy()

##### ##### ##### ##### ##### Save Errors ##### ##### ##### ##### #####

np.save('../errors/LOB+LIQ.npy', {'se_train':se_train.numpy(),'se_val':se_val.numpy(),\
                              'ape_train':ape_train.numpy(),'ape_val':ape_val.numpy()})

np.save('../errors/LinReg_LOB+LIQ.npy', {'se_train':se_train_reg.numpy(),'se_val':se_val_reg.numpy(),
                                     'ape_train':ape_train_reg.numpy(),'ape_val':ape_val_reg.numpy()})

Evaluation plots

Below we have five plots for five different variables we wanted to predict. They are created by plotting the training and prediction performance of the model throughout the year for each day's trading window, which goes from the 61st trading minute until the last trading minute of the day.

These windows are ordered and stacked, starting from the first trading day's window until the last day's trading window.

On the x-axis we always have the minutes and on the y-axis the corresponding variable of the plot.

Also, we made use of a linear regression model to compare the validation performance of our model with, which can also be seen in the plots below.

In [4]:
from CODES.utils.plotter import plotter

args = [ #FIRST AXIS     training data                                                                                  learned by model                                                                                    validation data                                                                                                                          predicted by model                                                                                                                                  learned by linear regression                                                                          predicted by linear regression
        [ [[i[0].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,0],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,0],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
         ,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]],[reg_mses[0],reg_mapes[0]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-2000,93000],[6.7,11.3]
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
         ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#SECOND AXIS
        [ [[i[1].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,1],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,1],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
         ,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]],[reg_mses[1],reg_mapes[1]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-2000,93000],[6.7,11.3]
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
         ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#THIRD AXIS
        [ [[i[2].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,2],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,2],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
         ,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]],[reg_mses[2],reg_mapes[2]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-2000,93000],[6.7,11.3]
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
         ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FOURTH AXIS
        [ [[i[3].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=2,alpha=0.5,fillstyle='left')],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=2,alpha=0.5,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=1,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,3],'-',dict(color='forestgreen',linewidth=0.1,alpha=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,3],'-',dict(color='gold',linewidth=0.1,alpha=0.5,fillstyle='right')]
         ,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]],[reg_mses[3],reg_mapes[3]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-2000,93000],[-0.0002,0.00045]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
         ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FIFTH AXIS
        [ [[i[4].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=2,alpha=0.5,fillstyle='left')],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=2,alpha=0.5,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=1,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,4],'-',dict(color='forestgreen',linewidth=0.1,alpha=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,4],'-',dict(color='gold',linewidth=0.1,alpha=0.5,fillstyle='right')]
         ,[dict(cellText=[[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]],[reg_mses[4],reg_mapes[4]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-2000,93000],[-0.0003,0.00055]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
         ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
       ]

attrs = ['plot','plot','plot','plot','plot','plot'
         ,'make_table'
         ,'set_xlim','set_ylim'
         ,'set_xlabel','set_ylabel','set_title'
         ,'legend','grid']

plotter(args,attrs,fig_title=f'Converged Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=600, ncols=1,xpad=5)#,save_path = os.getcwd())